Below is the final script with relevant narrative that produces the heatmap visualisation:
library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
library(tidyr) #Both packages allowed me to clean the data before visualising it.#I had to remove the first few headings to make sure the data read correctly,#and then separated the variable columns from the data as they had merged together.df <-read.csv("gcses-and-equivalent-in-english-and-maths-table-data.csv", skip =7, header =FALSE, quote ="\"", sep =",")names(df) <-c("Area code","Area name","2016-17","2017-18","2018-19", "2019-20","2020-21","2021-22","2022-23","2023-24")#Accounting for a space mark in one of the headings.df <- df %>%filter(`Area code`!="Area code")#Had to separate the data for each year which had also merged together.df_long <- df %>%pivot_longer( cols =`2016-17`:`2023-24`, names_to ="Year", values_to ="Value")#Converted from a wide to a long dataset so that I could demonstrate change over time,#rather than focus on regional trends.#The numbers were originally down as character rather than numeric data, and cleaned #the data to remove N/A values that weren't converted.df_long <- df_long %>%mutate(Value =as.numeric(Value))df_long_clean <- df_long %>%mutate(Value =as.numeric(Value)) %>%filter(!is.na(Value))library(ggplot2)library(tidyverse)
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(plotly) #These packages allowed me to visualise the data and make it interactive.
Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':
last_plot
The following object is masked from 'package:stats':
filter
The following object is masked from 'package:graphics':
layout
#I wanted to create a heatmap to communicate the bigger picture, demonstrating #trends over time but also accounting for regional differences.df_heatmap <- df_long_clean %>%mutate(`Area name`=fct_reorder(`Area name`, Value, .fun ='mean')) %>%#Mutated the y-axis so it would be shown from highest to lowest regional percentages#rather than alphabetically.ggplot(mapping =aes(x = Year, y =`Area name`, fill = Value)) +geom_tile() +scale_fill_viridis_c(option ="magma", direction =-1) +#Wanted the darker colours to represent higher percentages and vice versa.labs(title ="Attainment of GCSEs in English and Maths across England",x ="Year", y ="Lower-tier/Unitary Authority", fill ="Pass Rate") +theme(axis.text.y =element_text(size =4))df_heatmap
#Made the heatmap longer in order to leave spaces between the y-axis variables,#and used the plotly package to make the map interactive so you can get the specific#percentages of qualifications for each region per academic year.ggplotly(df_heatmap, height =1600)